options(warn=-1)
library(data.table)
suppressMessages(library(limma))
suppressMessages(library(ENmix))
suppressMessages(library(sva))
suppressMessages(library(ChAMP))
options(warn=0)

Exploring global DNA Methylation variability via PCs

# Let's look at the effect of sex
plotMDS(beta, top=10000, gene.selection="common",
        pch=17,col=c("deeppink","blue")[factor(pheno$sex)],
        dim=c(1,2),cex=1.5)
legend("center", legend=levels(factor(pheno$sex)),bty='n',
       cex=1.5,pch=17,col=c("deeppink","blue"))

# Let's look at the effect of sex but let's remove the X and Y chromosomes
betas.clean = beta[manifest[probe_type=="cg" & !chr %in% c("X","Y")]$index,]
plotMDS(betas.clean, top=10000, gene.selection="common",
        pch=17,col=c("deeppink","blue")[factor(pheno$sex)],
        dim=c(1,2),cex=1.5)
legend("center", legend=levels(factor(pheno$sex)),bty='n',
       cex=1.5,pch=17,col=c("deeppink","blue"))

# Let's look at the effect of technical variables
# Row
cols <- rainbow(n = length(levels(factor(pheno$row))))
plotMDS(betas.clean, top=10000, gene.selection="common",
        pch=17,col=cols[factor(pheno$row)],
        dim=c(1,2),cex=1.5)
legend("right", legend=levels(factor(pheno$row)),bty='n',
       cex=1.5,pch=17,col=cols)

# If clustering by technical variables is present, batch effects can be adjusted for using ComBat or using covariates for batch in downstream analyses.
# Example of ComBat
# Impute missing Beta-values (ComBat will produce an error with missingness)
# sum(is.na(betas.clean))
# betas.impute = champ.impute(beta=betas.clean, pd=pheno, k=5, ProbeCutoff=0.2, SampleCutoff=0.1)
# betas.impute = betas.impute$beta
# Run ComBat
# batch <- factor(pheno$Sentrix_ID)
# modcombat <- model.matrix(~1, data = pheno)
# betaCombat <- ComBat(dat = as.matrix(betas.impute), batch = batch, mod = modcombat)

It would be useful to look at several traits with global variability

cov<-data.frame(pheno[,c('smoker','sex','CD4','CD8','NK','MO','GR','B')])
npc <- 20 # Top 20 PCs
svd <- prcomp(t(na.omit(betas.clean)))
screeplot(svd, npc, type = "barplot")

eigenvalue <- svd[["sdev"]]^2
prop <- (sum(eigenvalue[1:npc])/sum(eigenvalue)) * 100
cat("Top ", npc, " principal components can explain ", 
    prop, "% of data \n    variation", "\n")
## Top  20  principal components can explain  81.99883 % of data 
##     variation
pcrplot(na.omit(betas.clean), cov, npc=10) # Already saved in working directory
## Analysis is running, please wait...!
## svdscreeplot.jpg was plotted 
## Top  10  principal components can explain  68.58916 % of data 
##     variation
## pcr_diag.jpg was plotted

Epigenetic Age

Load package for Age-Prediction

options(warn=-1)
suppressMessages(library(wateRmelon))
options(warn=0)
DNAmAge<-agep(beta)$horvath.age
hist(DNAmAge)

boxplot(DNAmAge);stripchart(DNAmAge, vertical = T,method = "jitter", add = T, pch = 20, col = 'red')

Agreement between Horvath’s Epigenetic age and Hannum’s clock

data(hannumCoef)
length(hannumCoef)
## [1] 71
DNAmAge.Hannum<-agep(beta,coeff=hannumCoef,method = "hannum")$custom_age

Correlation; agreement

cor.test(DNAmAge,DNAmAge.Hannum)
## 
##  Pearson's product-moment correlation
## 
## data:  DNAmAge and DNAmAge.Hannum
## t = 13.433, df = 28, p-value = 9.949e-14
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8576226 0.9666606
## sample estimates:
##       cor 
## 0.9304164
plot(DNAmAge.Hannum,DNAmAge,pch=21,ylab="Hortvath's DNAm Age",
     xlab="Hannum's DNAm Age",cex=1.2, bg=alpha("deepskyblue",0.45),main="Epigenetic Clocks")
legend("topleft",legend=paste0("r=",round(cor(DNAmAge.Hannum,DNAmAge),2)),bty="n")
abline(lm(DNAmAge~DNAmAge.Hannum),col="red",lw=2)

Age Acceleration Residuals

AgeAccelerationResidual<-residuals(lm(DNAmAge.Hannum~DNAmAge))
boxplot(AgeAccelerationResidual ~pheno$smoker, col=c("blue","red"))

wilcox.test(AgeAccelerationResidual ~ pheno$smoker)
## 
##  Wilcoxon rank sum exact test
## 
## data:  AgeAccelerationResidual by pheno$smoker
## W = 91, p-value = 0.3892
## alternative hypothesis: true location shift is not equal to 0

Differences by Smoking status

boxplot(DNAmAge ~pheno$smoker, col=c("blue","red"))

wilcox.test(DNAmAge ~ pheno$smoker)
## 
##  Wilcoxon rank sum exact test
## 
## data:  DNAmAge by pheno$smoker
## W = 102, p-value = 0.6827
## alternative hypothesis: true location shift is not equal to 0
boxplot(DNAmAge.Hannum ~pheno$smoker, col=c("blue","red"))

wilcox.test(DNAmAge.Hannum ~ pheno$smoker)
## 
##  Wilcoxon rank sum exact test
## 
## data:  DNAmAge.Hannum by pheno$smoker
## W = 93, p-value = 0.4363
## alternative hypothesis: true location shift is not equal to 0

Load Age Acceleration measures See Horvath New Methylation Age Calculator.

# Online<-read.csv("Clock/datout_New.output.csv")
Online<- Online[match(pheno$gsm, Online$SampleID),]
group <- NA
group[pheno$smoker=="smoker"] <- 1
group[pheno$smoker=="non-smoker"] <- 2
pairs(~DNAmAge + DNAmPhenoAge + DNAmAgeSkinBloodClock + 
    DNAmGrimAge + DNAmTL + DNAmAgeHannum, 
    col = c("red","blue")[group],
    pch = c(8, 18)[group], 
    data = Online)

Look at GrimAge Acceleration

boxplot(Online$DNAmGrimAgeAdjAge ~pheno$smoker, col=c("blue","red"))

wilcox.test(Online$DNAmGrimAgeAdjAge ~ pheno$smoker)
## 
##  Wilcoxon rank sum exact test
## 
## data:  Online$DNAmGrimAgeAdjAge by pheno$smoker
## W = 2, p-value = 5.157e-08
## alternative hypothesis: true location shift is not equal to 0

Pack years predicted from DNA methylation

boxplot(Online$DNAmPACKYRS ~pheno$smoker, col=c("blue","red"))

wilcox.test(Online$DNAmPACKYRS ~ pheno$smoker)
## 
##  Wilcoxon rank sum exact test
## 
## data:  Online$DNAmPACKYRS by pheno$smoker
## W = 3, p-value = 9.025e-08
## alternative hypothesis: true location shift is not equal to 0

DNA methylation estimate of TL

boxplot(Online$DNAmTLAdjAge ~pheno$smoker, col=c("blue","red"))

t.test(Online$DNAmTLAdjAge ~ pheno$smoker)
## 
##  Welch Two Sample t-test
## 
## data:  Online$DNAmTLAdjAge by pheno$smoker
## t = 3.5366, df = 27.251, p-value = 0.001473
## alternative hypothesis: true difference in means between group non-smoker and group smoker is not equal to 0
## 95 percent confidence interval:
##  0.08360011 0.31441308
## sample estimates:
## mean in group non-smoker     mean in group smoker 
##                0.0995033               -0.0995033

End of script 03